library(schrute)
library(tidyverse)
library(tidymodels)
tibble(theoffice)
office_ratings <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-17/office_ratings.csv')
office_ratings
remove_regex <- "[:punct:]|[:digit:]|parts |part |the |and"
office_ratings %>%
group_by(season) %>%
summarise(avg_rating = mean(imdb_rating)) %>%
ggplot(aes(season, avg_rating)) +
geom_line() +
scale_x_continuous(breaks = 1:9)
raw_ratings <- office_ratings %>%
mutate(
episode_name = title,
episode_name = episode_name,
episode_name = episode_name,
imdb_rating
)
office_info <- schrute::theoffice %>%
mutate(
season = as.numeric(season),
episode = as.numeric(episode),
episode_name = str_to_lower(episode_name),
episode_name = str_remove_all(episode_name, remove_regex),
episode_name = str_trim(episode_name)
) %>%
select(season, episode, episode_name, director, writer, character)
characters <- office_info %>%
count(episode_name, character) %>%
add_count(character, wt = n, name = "character_count") %>%
filter(character_count > 800) %>%
select(-character_count) %>%
pivot_wider(
names_from = character,
values_from = n,
values_fill = list(n = 0)
)
creators <- office_info %>%
distinct(episode_name, director, writer) %>%
pivot_longer(director:writer, names_to = "role", values_to = "person") %>%
separate_rows(person, sep = ";") %>%
add_count(person) %>%
filter(n > 20) %>%
distinct(episode_name, person) %>%
mutate(person_value = 1) %>%
pivot_wider(
names_from = person,
values_from = person_value,
values_fill = list(person_value = 0)
)
office <- office_info %>%
distinct(season, episode, episode_name) %>%
inner_join(characters) %>%
inner_join(creators) %>%
inner_join(raw_ratings %>%
select(episode_name, imdb_rating))
raw_ratings %>%
group_by(season) %>%
summarise(avg_rating = mean(imdb_rating)) %>%
ggplot(aes(season, avg_rating)) +
geom_line() +
scale_x_continuous(breaks = 1:9)
raw_ratings %>%
arrange(desc(imdb_rating))%>%
mutate(title = paste0(season,".", episode," ",title),
title = fct_reorder(title, imdb_rating)) %>%
head(20)%>%
ggplot(aes(title, imdb_rating, color = factor(season))) +
geom_point() + coord_flip()
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCmxpYnJhcnkoc2NocnV0ZSkNCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeSh0aWR5bW9kZWxzKQ0KYGBgDQoNCmBgYHtyfQ0KdGliYmxlKHRoZW9mZmljZSkNCg0Kb2ZmaWNlX3JhdGluZ3MgPC0gcmVhZHI6OnJlYWRfY3N2KCdodHRwczovL3Jhdy5naXRodWJ1c2VyY29udGVudC5jb20vcmZvcmRhdGFzY2llbmNlL3RpZHl0dWVzZGF5L21hc3Rlci9kYXRhLzIwMjAvMjAyMC0wMy0xNy9vZmZpY2VfcmF0aW5ncy5jc3YnKQ0KDQoNCm9mZmljZV9yYXRpbmdzDQoNCg0KcmVtb3ZlX3JlZ2V4IDwtICJbOnB1bmN0Ol18WzpkaWdpdDpdfHBhcnRzIHxwYXJ0IHx0aGUgfGFuZCINCmBgYA0KDQpgYGB7cn0NCm9mZmljZV9yYXRpbmdzICU+JQ0KICBncm91cF9ieShzZWFzb24pICU+JQ0KICBzdW1tYXJpc2UoYXZnX3JhdGluZyA9IG1lYW4oaW1kYl9yYXRpbmcpKSAlPiUNCiAgZ2dwbG90KGFlcyhzZWFzb24sIGF2Z19yYXRpbmcpKSArDQogIGdlb21fbGluZSgpICsNCiAgc2NhbGVfeF9jb250aW51b3VzKGJyZWFrcyA9IDE6OSkNCmBgYA0KDQpgYGB7cn0NCnJhd19yYXRpbmdzIDwtIG9mZmljZV9yYXRpbmdzICU+JQ0KICAgICBtdXRhdGUoDQogICAgICAgICBlcGlzb2RlX25hbWUgPSB0aXRsZSwNCiAgICAgICBlcGlzb2RlX25hbWUgPSBlcGlzb2RlX25hbWUsDQogICAgICAgICBlcGlzb2RlX25hbWUgPSBlcGlzb2RlX25hbWUsDQogICAgICAgICBpbWRiX3JhdGluZw0KICAgICApDQpgYGANCg0KYGBge3J9DQpvZmZpY2VfaW5mbyA8LSBzY2hydXRlOjp0aGVvZmZpY2UgJT4lDQogIG11dGF0ZSgNCiAgICBzZWFzb24gPSBhcy5udW1lcmljKHNlYXNvbiksDQogICAgZXBpc29kZSA9IGFzLm51bWVyaWMoZXBpc29kZSksDQogICAgZXBpc29kZV9uYW1lID0gc3RyX3RvX2xvd2VyKGVwaXNvZGVfbmFtZSksDQogICAgZXBpc29kZV9uYW1lID0gc3RyX3JlbW92ZV9hbGwoZXBpc29kZV9uYW1lLCByZW1vdmVfcmVnZXgpLA0KICAgIGVwaXNvZGVfbmFtZSA9IHN0cl90cmltKGVwaXNvZGVfbmFtZSkNCiAgKSAlPiUNCiAgc2VsZWN0KHNlYXNvbiwgZXBpc29kZSwgZXBpc29kZV9uYW1lLCBkaXJlY3Rvciwgd3JpdGVyLCBjaGFyYWN0ZXIpDQpgYGANCg0KDQoNCmBgYHtyfQ0KY2hhcmFjdGVycyA8LSBvZmZpY2VfaW5mbyAlPiUNCiAgY291bnQoZXBpc29kZV9uYW1lLCBjaGFyYWN0ZXIpICU+JQ0KICBhZGRfY291bnQoY2hhcmFjdGVyLCB3dCA9IG4sIG5hbWUgPSAiY2hhcmFjdGVyX2NvdW50IikgJT4lDQogIGZpbHRlcihjaGFyYWN0ZXJfY291bnQgPiA4MDApICU+JQ0KICBzZWxlY3QoLWNoYXJhY3Rlcl9jb3VudCkgJT4lDQogIHBpdm90X3dpZGVyKA0KICAgIG5hbWVzX2Zyb20gPSBjaGFyYWN0ZXIsDQogICAgdmFsdWVzX2Zyb20gPSBuLA0KICAgIHZhbHVlc19maWxsID0gbGlzdChuID0gMCkNCiAgKQ0KYGBgDQoNCmBgYHtyfQ0KY3JlYXRvcnMgPC0gb2ZmaWNlX2luZm8gJT4lDQogIGRpc3RpbmN0KGVwaXNvZGVfbmFtZSwgZGlyZWN0b3IsIHdyaXRlcikgJT4lDQogIHBpdm90X2xvbmdlcihkaXJlY3Rvcjp3cml0ZXIsIG5hbWVzX3RvID0gInJvbGUiLCB2YWx1ZXNfdG8gPSAicGVyc29uIikgJT4lDQogIHNlcGFyYXRlX3Jvd3MocGVyc29uLCBzZXAgPSAiOyIpICU+JQ0KICBhZGRfY291bnQocGVyc29uKSAlPiUNCiAgZmlsdGVyKG4gPiAyMCkgJT4lDQogIGRpc3RpbmN0KGVwaXNvZGVfbmFtZSwgcGVyc29uKSAlPiUNCiAgbXV0YXRlKHBlcnNvbl92YWx1ZSA9IDEpICU+JQ0KICBwaXZvdF93aWRlcigNCiAgICBuYW1lc19mcm9tID0gcGVyc29uLA0KICAgIHZhbHVlc19mcm9tID0gcGVyc29uX3ZhbHVlLA0KICAgIHZhbHVlc19maWxsID0gbGlzdChwZXJzb25fdmFsdWUgPSAwKQ0KICApDQpgYGANCg0KDQpgYGB7cn0NCm9mZmljZSA8LSBvZmZpY2VfaW5mbyAlPiUNCiAgIGRpc3RpbmN0KHNlYXNvbiwgZXBpc29kZSwgZXBpc29kZV9uYW1lKSAlPiUNCiAgIGlubmVyX2pvaW4oY2hhcmFjdGVycykgJT4lDQogICAgIGlubmVyX2pvaW4oY3JlYXRvcnMpICU+JQ0KICAgICBpbm5lcl9qb2luKHJhd19yYXRpbmdzICU+JQ0KICAgICAgICAgICAgICAgICAgICBzZWxlY3QoZXBpc29kZV9uYW1lLCBpbWRiX3JhdGluZykpDQpgYGANCg0KYGBge3J9DQpyYXdfcmF0aW5ncyAlPiUNCiAgZ3JvdXBfYnkoc2Vhc29uKSAlPiUNCiAgc3VtbWFyaXNlKGF2Z19yYXRpbmcgPSBtZWFuKGltZGJfcmF0aW5nKSkgJT4lDQogIGdncGxvdChhZXMoc2Vhc29uLCBhdmdfcmF0aW5nKSkgKw0KICBnZW9tX2xpbmUoKSArDQogIHNjYWxlX3hfY29udGludW91cyhicmVha3MgPSAxOjkpDQpgYGANCg0KYGBge3J9DQpyYXdfcmF0aW5ncyAlPiUNCiAgICAgICAgICAgYXJyYW5nZShkZXNjKGltZGJfcmF0aW5nKSklPiUNCiAgICAgICAgICAgbXV0YXRlKHRpdGxlID0gcGFzdGUwKHNlYXNvbiwiLiIsIGVwaXNvZGUsIiAiLHRpdGxlKSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdGl0bGUgPSBmY3RfcmVvcmRlcih0aXRsZSwgaW1kYl9yYXRpbmcpKSAlPiUNCiAgICAgICAgICAgICAgaGVhZCgyMCklPiUNCiAgICAgZ2dwbG90KGFlcyh0aXRsZSwgaW1kYl9yYXRpbmcsIGNvbG9yID0gZmFjdG9yKHNlYXNvbikpKSArDQogICAgICAgICBnZW9tX3BvaW50KCkgKyBjb29yZF9mbGlwKCkNCg0KYGBgDQo=